# -*- coding: utf-8 -*-
import copy
import math
import random

import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from datetime import datetime

from scrapy.utils.project import get_project_settings
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from hsysmall.rules import parse_catalog, parse_supplier, parse_sku_list, parse_total_page, parse_query_info
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    # 常用链接
    index_url = 'https://www.hsysmall.com/index.html'
    sku_list_url = 'https://www.hsysmall.com/goods/searchProduct2.html?title='

    start_urls = [index_url]

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 页数限制
        self.max_page_limit = 200
        self.page_size = 25

    def start_requests(self):
        yield Request(
            url=self.index_url,
            headers={
                'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                'accept-encoding': 'gzip, deflate, br',
                'accept-language': 'zh-CN,zh;q=0.9',
                'cache-control': 'max-age=0',
                'content-type': 'application/x-www-form-urlencoded',
                'origin': 'https://www.hsysmall.com',
                'referer': 'https://www.hsysmall.com',
                'upgrade-insecure-requests': '1',
                'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3766.400 QQBrowser/10.6.4163.400',
            },
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            callback=self.parse_catalog,
            errback=self.error_back,
        )

    def parse_catalog(self, response):
        # 品类
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

        settings = get_project_settings()
        suppliers = settings.getdict('SUPPLIERS', {})
        self.logger.info('供应商: sp=%s' % suppliers)
        for sp_id in suppliers.keys():
            page = 1
            yield self._build_sku_req(
                sp_id=sp_id,
                sp_name=suppliers.get(sp_id),
                page=page,
                callback=self.parse_sku_list
            )

    def _build_sku_req(self, sp_id, sp_name, page, callback, min_price=-1, max_price=-1):
        form_data = {
            'pageNo': str(page),
            'supps': sp_id,
            'priceMin': '',
            'priceMax': '',
            'sort': '2D',
            'showStock': '',
        }
        if min_price > 0:
            form_data['priceMin'] = str(min_price)
        if max_price > 0:
            form_data['priceMax'] = str(max_price)
        return scrapy.FormRequest(
            url=self.sku_list_url,
            method="POST",
            meta={
                'reqType': 'sku',
                'batchNo': self.batch_no,
                'page': page,
                'spId': sp_id,
                'spName': sp_name,
                'priceMin': min_price,
                'priceMax': max_price,
            },
            headers={
                'Host': 'www.hsysmall.com',
                'Connection': 'keep-alive',
                'Cache-Control': 'max-age=0',
                'Origin': 'https://www.hsysmall.com',
                'Upgrade-Insecure-Requests': '1',
                'Content-Type': 'application/x-www-form-urlencoded',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4506.400',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                'Referer': 'https://www.hsysmall.com/goods/searchProduct2.html?title=',
                'Accept-Encoding': 'gzip, deflate, br',
                'Accept-Language': 'zh-CN,zh;q=0.9',
                # 'Cookie': '__jsluid_s=98913ab0dab88858b93801fa7e8878b7; UM_distinctid=17c12daa92e3f9-06b402de9e547b-3354427f-384000-17c12daa92fab7; _npstore_browpro=%2Cs5096411e%2Cs2465203e%2Cs5077229e%2Cs5090460e%2Cs5020722e%2Cs5098669e%2Cs4435187e%2Cs4990300e; year_settle_show_cookie=; CNZZDATA1279127406=928302198-1632403264-%7C1632658497; SESSION=bf51df84-f908-48fa-a1af-8c45ab5507d8',
            },
            cookies={
                'SESSION': 'd0da0c43-8bae-40c1-8c97-ae24dafc4659',
                # '_npstore_browpro': '%2Cs5096411e%2Cs2465203e%2Cs5077229e%2Cs5090460e%2Cs5020722e%2Cs5098669e%2Cs4435187e%2Cs4990300e',
                # '__jsluid_s': '98913ab0dab88858b93801fa7e8878b7',
            },
            formdata=form_data,
            callback=callback,
            errback=self.error_back,
            priority=60,
        )

    def parse_sku_list(self, response):
        meta = response.meta
        cur_page = meta.get("page")
        sp_id = meta.get("spId")
        sp_name = meta.get("spName")
        cur_min_price = meta.get('priceMin', -1)
        cur_max_price = meta.get('priceMax', -1)

        # 第一页
        sku_list = parse_sku_list(response)
        if not sku_list:
            self.logger.info('空页1: sp=%s, min=%s, max=%s, page=%s' % (sp_id, cur_min_price, cur_max_price, cur_page))
        else:
            self.logger.info('清单1: sp=%s, min=%s, max=%s, page=%s, count=%s' % (sp_id, cur_min_price, cur_max_price, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)

            # 商品超过5000
            total_pages = parse_query_info(response)
            if total_pages < self.max_page_limit:
                self.logger.info('页数1: sp=%s, min=%s, max=%s, ttp=%s' % (sp_id, cur_min_price, cur_max_price, total_pages))
                # 商品量 < 5000
                # 直接分页请求
                for page in range(2, total_pages + 1):
                    yield self._build_sku_req(
                        page=page,
                        sp_id=sp_id,
                        sp_name=sp_name,
                        min_price=cur_min_price,
                        max_price=cur_max_price,
                        callback=self.parse_more_sku
                    )
            else:
                # 商品量 >= 5000 (递归增加请求参数)
                if cur_max_price <= 0:
                    # 加入价格阶梯
                    self.logger.info('阶梯3: sp=%s, ttp=%s' % (sp_id, total_pages))
                    ladders = [
                        {'min': 0, 'max': 100},
                        {'min': 100, 'max': 1000},
                        {'min': 1000, 'max': 5000},
                        {'min': 5000, 'max': 100000000},
                    ]
                    for ladder in ladders:
                        min_price = ladder.get('min')
                        max_price = ladder.get('max')
                        yield self._build_sku_req(
                            sp_id=sp_id,
                            sp_name=sp_name,
                            page=1,
                            min_price=min_price,
                            max_price=max_price,
                            callback=self.parse_sku_list
                        )
                else:
                    half_price = cur_min_price + math.ceil((cur_max_price - cur_min_price) / 2)
                    # 价格区间拆分
                    yield self._build_sku_req(
                        sp_id=sp_id,
                        sp_name=sp_name,
                        page=1,
                        min_price=cur_min_price,
                        max_price=half_price,
                        callback=self.parse_sku_list
                    )
                    yield self._build_sku_req(
                        sp_id=sp_id,
                        sp_name=sp_name,
                        page=1,
                        min_price=half_price,
                        max_price=cur_max_price,
                        callback=self.parse_sku_list
                    )
                    self.logger.info('折半4: sp=%s, ttp=%s, half=%s' % (sp_id, total_pages, half_price))

    # 处理sku列表
    def parse_more_sku(self, response):
        meta = response.meta
        sp_id = meta.get("spId")
        cur_page = meta.get('page', 1)
        cur_min_price = meta.get('priceMin', -1)
        cur_max_price = meta.get('priceMax', -1)
        # 商品
        sku_list = parse_sku_list(response)
        if sku_list:
            if cur_min_price < 0 and cur_max_price < 0:
                self.logger.info('清单21: sp=%s, page=%s, cnt=%s' % (sp_id, cur_page, len(sku_list)))
            else:
                self.logger.info('清单22: sp=%s, min=%s, max=%s, page=%s, cnt=%s' % (sp_id, cur_min_price, cur_max_price, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
        else:
            if cur_min_price < 0 and cur_max_price < 0:
                self.logger.info('空页21: sp=%s, page=%s' % (sp_id, cur_page))
            else:
                self.logger.info('空页22: sp=%s, min=%s, max=%s, page=%s' % (sp_id, cur_min_price, cur_max_price, cur_page))
